## Import necessary libraries and dataframe
import pandas as pd
data = pd.read_csv("Data\Connections.csv")
data.head(10)
| First Name | Last Name | URL | Email Address | Company | Position | Connected On | |
|---|---|---|---|---|---|---|---|
| 0 | Wells | Velasquez Maciel | https://www.linkedin.com/in/wells-velasquez-ma... | NaN | SiteMinder | Customer Solutions Consultant Level 3 | 11 Sep 2024 |
| 1 | Patrick | Canney | https://www.linkedin.com/in/paddyblack-exe | NaN | SOTI | Senior Technical Support Specialist | 11 Sep 2024 |
| 2 | Borja | Martínez Ariza | https://www.linkedin.com/in/borja-mart%C3%ADne... | NaN | Abbott | Customer Service - Order Management Specialist | 30 Apr 2024 |
| 3 | Krutik | Pednekar | https://www.linkedin.com/in/krutikpednekar | NaN | Trane Technologies | Standards Lead | 24 Apr 2024 |
| 4 | Nathan | Woodward | https://www.linkedin.com/in/nathan-woodward-42... | NaN | Avondata Systems Ltd | Client Success Manager | 23 Apr 2024 |
| 5 | RIGAN | NGANGOM | https://www.linkedin.com/in/rigan-ngangom-17b0... | NaN | EDoors Inc | Senior Team Lead | 21 Feb 2024 |
| 6 | Chiara | Celidoni | https://www.linkedin.com/in/chiaracelidoni | NaN | SiteMinder | Strategic Account Director | 21 Dec 2023 |
| 7 | Daire | O'Neill | https://www.linkedin.com/in/daire-o-neill-3584762 | NaN | Irish Life Investment Managers | Manager Reporting: Performance Measurement Team | 21 Dec 2023 |
| 8 | Dustin | Aldridge | https://www.linkedin.com/in/dustin-aldridge-98... | NaN | SiteMinder | Premium Services Manager | 01 Dec 2023 |
| 9 | Rita | Guembes, MCI | https://www.linkedin.com/in/ritaguembesc | NaN | SiteMinder | Customer Onboarding Success Specialist | 30 Nov 2023 |
## Sort data by connection date and visualize
data = data.sort_values(by='Connected On')
import plotly.express as px
px.line(data.groupby(by='Connected On').count().reset_index(),
x = 'Connected On',
y = 'First Name',
labels = {'First Name':'No. of Connections'},
title = 'Connection Timeline')
## Now changing the date format and re-creating visual
data['Connected On'] = pd.to_datetime(data['Connected On'], format='%d %b %Y')
data = data.sort_values(by='Connected On')
grouped_data = data.groupby(by='Connected On').count().reset_index()
## Create the line plot
fig = px.line(grouped_data,
x='Connected On',
y='First Name', # Assuming 'First Name' is a column representing connections
labels={'First Name': 'No. of Connections'},
title='Connection Timeline')
fig.show()
# Create a new column with the year and month
data['YearMonth'] = data['Connected On'].dt.to_period('M') # Year and Month format YYYY-MM
# Group by the 'YearMonth' column and count the number of connections for each month
monthly_data = data.groupby('YearMonth').size().reset_index(name='ConnectionCount')
# Sort the data by the number of connections in descending order
top_months = monthly_data.sort_values(by='ConnectionCount', ascending=False).head(10)
# Print the top 10 months with the highest number of connections
print(top_months)
YearMonth ConnectionCount 39 2020-04 46 2 2017-02 39 23 2018-12 33 32 2019-09 27 6 2017-06 16 4 2017-04 15 52 2021-05 13 18 2018-07 13 5 2017-05 12 36 2020-01 11
## Now view the company column of each connection
data['Company']
496 SAP SuccessFactors
495 adidas
490 Qualtrics
493 Employment Hero
494 Cisco
...
4 Avondata Systems Ltd
3 Trane Technologies
2 Abbott
1 SOTI
0 SiteMinder
Name: Company, Length: 497, dtype: object
## Group and count the connections by company
group_company = data.groupby(by='Company').count().reset_index()
group_company
| Company | First Name | Last Name | URL | Email Address | Position | Connected On | YearMonth | |
|---|---|---|---|---|---|---|---|---|
| 0 | 7AJ WORLD ENTERTAINMENT | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 1 | 7th Heaven | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 2 | AIB | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 3 | AIHR | Academy to Innovate HR | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 4 | AKOFENA RENTALS LIMITED | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 290 | owl.co | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 291 | permanent tsb | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 292 | truelink Consulting GmbH | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 293 | www.cestandard.com | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 294 | 𝙃𝙖𝙧𝙢𝙤𝙣𝙞𝙘 𝘿𝙞𝙜𝙞𝙩𝙖𝙡 𝙈𝙖𝙧𝙠𝙚𝙩𝙞𝙣𝙜 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
295 rows × 8 columns
## Sorting the results by number of connections per company
group_company = group_company.sort_values(by='Connected On', ascending=False).reset_index(drop=True)
group_company
| Company | First Name | Last Name | URL | Email Address | Position | Connected On | YearMonth | |
|---|---|---|---|---|---|---|---|---|
| 0 | SiteMinder | 100 | 100 | 100 | 3 | 100 | 100 | 100 |
| 1 | SAP | 38 | 38 | 38 | 0 | 38 | 38 | 38 |
| 2 | Genesys | 5 | 5 | 5 | 0 | 5 | 5 | 5 |
| 3 | Salesforce | 4 | 4 | 4 | 0 | 4 | 4 | 4 |
| 4 | NUI Galway | 4 | 4 | 4 | 0 | 4 | 4 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 290 | GuestCentric Systems | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 291 | HERO Recruitment Ltd. | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 292 | HID | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 293 | Hayes solicitors LLP | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
| 294 | 𝙃𝙖𝙧𝙢𝙤𝙣𝙞𝙘 𝘿𝙞𝙜𝙞𝙩𝙖𝙡 𝙈𝙖𝙧𝙠𝙚𝙩𝙞𝙣𝙜 | 1 | 1 | 1 | 0 | 1 | 1 | 1 |
295 rows × 8 columns
fig=px.bar(group_company[:150],
x = 'Company',
y = 'Connected On',
labels = {'Connected On':'Number of Connections'},
width = 1000,
height = 800,
title = 'Bar graph for companies that my connections work at'
)
fig.show()
## Creating a treemap visualization with less companies
fig=px.treemap(group_company[:100], path = ['Company','Position'],
values = 'Connected On',
labels = {'Connected On':'Number of Connections'},
width = 1000,
height = 800,
title = 'Bar graph for companies that my connections work at'
)
fig.show()
C:\ProgramData\Anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. C:\ProgramData\Anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
## Finding the positions of each connection
data['Position'].value_counts()
Director 5
Manager 5
Project Manager 3
Business Development Manager 3
Account Executive 3
..
Partner Integrations Team Leader 1
Developer 1
L&D Trainer 1
Software Tester 1
Customer Solutions Consultant Level 3 1
Name: Position, Length: 442, dtype: int64
## Now only going to show the values above 20%
position_data = data['Position'].value_counts()/len(data)*100 > 0.20
## Get the count of these values
data['Position'].value_counts()[data['Position'].value_counts()/len(data)*100 > 0.20]
Director 5
Manager 5
Project Manager 3
Business Development Manager 3
Account Executive 3
..
Partner Integrations Team Leader 1
Developer 1
L&D Trainer 1
Software Tester 1
Customer Solutions Consultant Level 3 1
Name: Position, Length: 442, dtype: int64
# Create the bar chart
fig = px.bar(data.groupby('Position').size().reset_index(name='Count').sort_values(by='Count', ascending=False),
x='Position',
y='Count',
labels={'Number of Connections': 'Position'},
width=1000,
height=900,
title='The various positions held by my LinkedIn connections'
)
# Show the plot
fig.show()
pip install wordcloud matplotlib
Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages. Requirement already satisfied: wordcloud in c:\programdata\anaconda3\lib\site-packages (1.8.2.2) Requirement already satisfied: matplotlib in c:\programdata\anaconda3\lib\site-packages (3.5.1) Requirement already satisfied: numpy>=1.6.1 in c:\programdata\anaconda3\lib\site-packages (from wordcloud) (1.21.5)
[notice] A new release of pip is available: 23.0.1 -> 24.2 [notice] To update, run: python.exe -m pip install --upgrade pip
Requirement already satisfied: pillow in c:\programdata\anaconda3\lib\site-packages (from wordcloud) (9.0.1) Requirement already satisfied: fonttools>=4.22.0 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (4.25.0) Requirement already satisfied: packaging>=20.0 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (21.3) Requirement already satisfied: python-dateutil>=2.7 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (2.8.2) Requirement already satisfied: cycler>=0.10 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (0.11.0) Requirement already satisfied: pyparsing>=2.2.1 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (3.0.4) Requirement already satisfied: kiwisolver>=1.0.1 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (1.3.2) Requirement already satisfied: six>=1.5 in c:\programdata\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def CreateWordCloud(text):
wordcloud=WordCloud(width=1000, height=900,
background_color='black',
min_font_size = 10,
colormap = 'Set2').generate(text)
fig=plt.figure(figsize=(15,10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.show()
return fig
## Converting Position data to a string
positions_text = ' '.join(data['Position'].dropna().astype(str))
# Call the function using the extracted text
CreateWordCloud(positions_text)